home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
TeX 1995 July
/
TeX CD-ROM July 1995 (Disc 1)(Walnut Creek)(1995).ISO
/
biblio
/
bibtex
/
utils
/
refer-tools
/
refer2bibtex.pl-0.9.0
< prev
next >
Wrap
Text File
|
1993-08-28
|
64KB
|
1,640 lines
#!/usr/local/bin/perl
#
# r2b : convert a refer database to a BiBTeX database
# Copyright 1992, 1993 by Dana Jacobsen (jacobsd@cs.orst.edu)
#
#version = "0.1.1";# 17 Apr 92 jacobsd Wrote original version
#version = "0.2.0";# 20 Apr 92 jacobsd Added tib support
#version = "0.3.0";# 21 Apr 92 jacobsd Rewrote heuristics
#version = "0.4.0";# 22 Apr 92 jacobsd Revamped the rofftotex stuff
#version = "0.5.2";# 24 Apr 92 jacobsd some cleanup
#version = "0.6.0";# 25 Apr 92 jacobsd understands names
#version = "0.6.1";# 26 Apr 92 jacobsd cleanup
#version = "0.6.2";# 27 Apr 92 jacobsd added support for a few more fields
#version = "0.6.3";# 27 Apr 92 jacobsd little more tib support
#version = "0.6.4";# 27 Apr 92 jacobsd added Roman-8 chars and more options
#version = "0.6.5";# 27 Apr 92 jacobsd integrated error routine
#version = "0.7.0";# 2 May 92 jacobsd added groff chars and fixed bugs
#version = "0.7.1";# 2 May 92 jacobsd fixed a few more things
#version = "0.7.2";# 10 Aug 92 jacobsd changed key generation
#version = "0.7.3";# 16 Aug 92 jacobsd added ISBN, 2 overstrikes, -q
#version = "0.7.4";# 19 Aug 92 jacobsd overstrike, changes for proceedings
#version = "0.7.5";# 20 Aug 92 jacobsd efficiency moves, month abbrevs
#version = "0.7.6";# 29 Aug 92 jacobsd added eqn flag
#version = "0.7.7";# 2 Sep 92 jacobsd changed name, edition, report parsing
#version = "0.8.0";# 4 Sep 92 jacobsd added date and option field to header
#version = "0.8.1";# 7 Sep 92 jacobsd added ibm option, corrected ms macros
#version = "0.8.2";# 5 Oct 92 jacobsd fixed -ms/-mm macro confusion (again)
#version = "0.8.3";# 5 Oct 92 jacobsd parsedate, edition, movements
#version = "0.8.4";# 8 Oct 92 jacobsd added \s point size changing
#version = "0.8.5";# 14 May 93 jacobsd literals, parsename, font changing
$version = "0.9.0";# 20 May 93 jacobsd
#
# todo: final debugging for release
#
# All bug-fixes, suggestions, flames, and compliments gladly accepted.
#
# These are site selected.
#
$maxflength = 2950; # Bibtex doesn't want lines longer than this.
$maxllength = 14; # maximum length of the text in a label (plus decade)
$prcontents = 0; # print the contents (%Y) field.
# These are the program defaults that can be changed by command line options.
#
$roffconv = 1; # -n : no roff-to-tex conversion
$ibmconv = 0; # -ibm : convert ibm graphics characters
$nowarnings = 0; # -q : don't print warnings
$tibfmt = 0; # -tib : tib bibliography format
$overstrike = 0; # -overstrike : allow \:o = \(:o. European troff??
$handleeqn = 0; # -eqn : handle some eqn @@ delimited constructs
$ignorelabel= 0; # -ignorelabel: don't use L field for citekey
$deroffonly = 0; # -deroff-only
$protectTeX = 1; # -noprotect : don't protect TeX special characters
$nameconv = 1; # -noname-conv
$revauthor = 0; # -reverse-author
$capprotect = 1; # -nocap-protect = 0. -cap-protect = 2.
$convertcommand = '';
$toterrors = 0;
while (@ARGV) {
$_ = shift @ARGV;
$convertcommand .= ' ' . $_;
/^--$/ && do { push(@files, @ARGV); undef @ARGV; next; };
/^-n$/ && do { $roffconv = 0; next; };
/^-ibm/ && do { $ibmconv = 1; next; };
/^-q$/ && do { $nowarnings = 1; next; };
/^-qq$/ && do { $nowarnings = 2; next; }; # this turns off ALL messages
/^-tib/ && do { $tibfmt = 1; next; };
/^-ove/ && do { $overstrike = 1; next; };
/^-eqn/ && do { $handleeqn = 1; next; };
/^-der/ && do { $deroffonly = 1; next; };
/^-non/ && do { $nameconv = 0; next; };
/^-rev/ && do { $revauthor = 1; next; };
/^-noc/ && do { $capprotect = 0; next; };
/^-cap/ && do { $capprotect = 2; next; };
/^-ign/ && do { $ignorelabel = 1; next; };
/^-nop/ && do { $protectTeX = 0; next; };
push (@files, $_);
}
if ($#files == -1) {
push (@files, "-");
}
print "%\n";
print "% converted from ", ($tibfmt ? "tib" : "refer");
print " format by refer-to-bibtex $version";
@tarr = localtime(time);
# convert month from numeric to textual
$tarrmon = (Jan, Feb, Mar, Apr, May, Jun, Jul, Aug,
Sep, Oct, Nov, Dec)[$tarr[4]];
# add a leading 0 if the minute is only 1 digit.
$tarr[1] = '0' . $tarr[1] if length($tarr[1]) == 1;
# print date in format "21:09, 4 Sep 92"
print " -- $tarr[2]:$tarr[1], $tarr[3] $tarrmon $tarr[5]\n";
# print the command line as they entered it, so we know special options
print "% r2b$convertcommand\n";
print "%\n\n";
foreach $infile (@files) {
open (IN, $infile) || ((warn "Can't open $infile: $!\n"), next);
$linenum = 0;
$lastfield = 0;
$errors = 0;
if ($deroffonly) {
while (<IN>) {
chop;
$linenum++;
$_ = &doibmtoroff($_) if $ibmconv;
$_ = &dorofftotex($_) if $roffconv;
print $_, "\n";
}
exit 0;
}
while (<IN>) {
chop;
$linenum++;
/^\s*$/ && do { if ($lastfield) {
&doentry();
undef(%entry);
undef($lastfield);
}
next;
};
/^[^%]/ && do { if ($lastfield) {
if ( ($lastfield eq X) || ($lastfield eq Y) ) {
$entry{$lastfield} .= "\n" . $_;
} else {
$entry{$lastfield} .= " " . $_;
}
} else {
print STDERR "line $linenum:";
print STDERR "Line without field identifier: \n$_\n";
$errors++;
}
next;
};
$lastfield = 0;
if (substr($_, 3, 1) eq '#') { next; } # comment
$field = substr($_, 1, 1);
# Convert some lowercase fields to O. Bibtex really doesn't have
# any way of dealing with tib's lower case ``translated'' fields.
# Most of the lower case fields in my experience are usually typos.
# Abstracts, contents, and comments seem to be non-standard. I have
# assumed %X for abstract and %Y for contents.
# Refer, tib, and bib seem to have their own styles, and usually
# people add on fields anyway. This is reaching the limits of what I
# can handle even by hand-translating.
($field =~ /^[or]$/) && do { $field = "O"; };
$lastfield = $field;
if ($field eq '%') { next; } # comment
$rest = substr($_, 3);
if ($field eq "\\") {
print q/@preamble{ "/, substr($_, 1), qq/" }\n/;
next;
}
if ( ($field eq A) || ($field eq Q) || ($field eq E) ) {
$entry{$field} .= " and " . $rest;
} elsif ( ($field eq X) ) {
$entry{$field} .= "\n\n" . $rest;
} else {
$entry{$field} .= " " . $rest;
}
# let 0 be a valid identifier, but we ignore it. EndNote Plus puts it
# out as a type identifier, but it's often wrong. We'll figure it out.
# $allindents = "ABCDEGHIJKLMNOPQRSTUVXYZ0l*$";
# organize these in likelihood order and get some speed improvement.
# A: 21%, DT: 11.3%, P: 10%, K: 8.7%, V: 7.1%, J: 6.3%, ICB: 4.0%
$allindents = "ADTPKVJICBNESL0XRO*HGYMQUZl$";
if (index($allindents, $field) == -1) {
&anerror("Unknown field identifier: $_");
}
}
if (%entry) {
&doentry();
}
foreach $type (sort keys(%number)) {
($nowarnings < 2) && printf STDERR "%5d %s\n", $number{$type}, $type;
$totalentries += $number{$type};
}
if ($nowarnings < 2) {
print STDERR "$totalentries entries, ";
$errors == 0 ? print STDERR "no error" : print STDERR "$errors error";
$errors == 1 ? print STDERR "\n" : print STDERR "s\n";
}
$toterrors += $errors;
}
exit $toterrors;
##########################################
#
sub doentry {
# do some processing on each field
foreach $field (keys(%entry)) {
$entry{$field} =~ s/^\s+//;
$entry{$field} =~ s/\s+$//;
$entry{$field} = &doibmtoroff($entry{$field}) if $ibmconv;
$entry{$field} = &dorofftotex($entry{$field}) if $roffconv;;
if (length($entry{$field}) > $maxflength) {
$entry{$field} = substr($entry{$field}, 0, $maxflength-3);
$entry{$field} .= "...";
&anerror("field %$field longer than $maxflength characters.");
}
#$entry{$field} =~ s/(^|[^\\])~/$1\\ /g; # ties (~) to literal space (\ )
}
# Because the refer format does not have fields set aside for such things
# as edition, ISBN, ISSN, look for them in other fields. Also, some people
# often put things like pages, techreport, and other information in the
# wrong field. Once again, look for them and move them to the correct one.
# Look for Thesis or Dissertation in O and move to R
if ($entry{O}) {
$_ = $entry{O};
if ( (!$entry{R}) && ( (/thesis/i) || /dissert/i) ) {
$entry{R} = $entry{O};
delete $entry{O};
}
}
# Look for "Tech* Rep*" in S and move to R
if ( ($entry{S} =~ /tech\w*\s+rep\w*/i) && (!$entry{R}) ) {
$entry{R} = $entry{S};
delete $entry{S};
}
# Look for "* No. *" in V and move to N
if (($entry{V} =~ /(\d+)\s+(no\.?|numb?e?r?\.?)\s+(\d+)/i) && (!$entry{N})){
$entry{N} = $3;
$entry{V} =~ s/(\d+)\s+(no\.?|numb?e?r?\.?)\s+(\d+)/$1/i;
}
# Look for "* Edition" in some fields and move to Ed field
foreach $field (O,R,S,V,T,B) {
if ($entry{$field} =~ /([\w\d]+)\s+edition/i) {
$entry{Ed} = $1;
$entry{$field} =~ s/\s*[-,;(]?\s*([\w\d]+)\s+edition\s*[),;]?\s*//i;
if ($entry{$field} =~ /^\s*$/) {
delete $entry{$field};
}
}
}
# Look for ISBN/ISSN # in some fields and move to ISBN/ISSN
foreach $field (G,O) {
if ($entry{$field} =~ /IS[BS]N/) {
$entry{$field} =~ s/\\ /~/g;
if ($entry{$field} =~ /ISBN\s*:?\s*(\d\S*)/i) {
$entry{ISBN} = $1;
$entry{ISBN} =~ s/[;.,]$//g;
$entry{ISBN} =~ s/~/-/g;
$entry{$field} =~ s/\s*[,;]?\s*ISBN\s*:?\s*(\d\S*)\s*[,;]?//i;
}
if ($entry{$field} =~ /ISSN\s*:?\s*(\d\S*)/i) {
$entry{ISSN} = $1;
$entry{ISSN} =~ s/[;.,]$//g;
$entry{ISSN} =~ s/~/-/g;
$entry{$field} =~ s/\s*[,;]?\s*ISSN\s*:?\s*(\d\S*)\s*[,;]?//i;
}
$entry{$field} =~ s/(^|[^\\])~/$1\\ /g;
}
}
# look for pp or pages in O and move to P
if ( (!$entry{P}) && ($entry{O} =~ /[XIVxiv]*\+?(\d+)\s*(pp\.?|pages),?/i) ) {
$entry{P} = $1;
$entry{O} =~ s/\s*[,;]?\s*[XIVxiv]*\+?(\d+)\s*(pp\.?|pages),?\s*//i;
}
# look for date in B if there is no D field
if ( (!$entry{D}) && ($entry{B}) ) {
if ($entry{B} =~ /\b(\d\d\d\d)\b/) {
$entry{D} = $1;
} elsif ($entry{B} =~ /'(\d\d)\b/) {
$entry{D} = $1;
}
}
# pick out reptype and repnumber
undef ($reptype, $repnumber);
if ($entry{R}) {
($reptype, $repnumber) = $entry{R} =~ /(.+)\s+(\S+)$/;
if ($repnumber !~ /\d/) {
$reptype = $entry{R};
undef $repnumber;
}
}
# titles: cap-protect = 0, leave them alone.
# cap-protect = 1, protect multi-cap sequences, and singles. (default)
# cap-protect = 2, protect all capitals.
if ($entry{T}) {
if ($capprotect == 1) {
1 while $entry{T} =~
s/([^{\\\w]|^)([A-Z]+)([^{}\\\w]|$)/$1{$2}$3/g;
$entry{T} =~ s/^{([A-Z])}/$1/;
} elsif ($capprotect == 2) {
$entry{T} =~ s/([A-Z]+)/{$1}/g;
}
}
# set date fields
&parsedate();
# convert names to BiBTeX format as best we can
if ($entry{A}) {
$entry{Key_A} = &parsename($entry{A}, A);
$entry{A} = $fname;
$aeditors = $editors;
$acauthor = $corpauthors;
}
if ($entry{E}) {
$entry{Key_E} = &parsename($entry{E}, E);
$entry{E} = $fname;
} elsif ($aeditors) {
$entry{E} = $entry{A};
$entry{Key_E} = $entry{Key_A};
delete $entry{Key_A};
delete $entry{A};
}
if ($entry{Q} || $entry{I}) {
if ($entry{Q}) {
$entry{Q} =~ s/^and //;
$entry{Q} =~ s/\s+/ /g;
($entry{Key_Q}) = split(/[\s~]/, $entry{Q});
} else {
($entry{Key_I}) = split(/[\s~]/, $entry{I});
}
if ($acauthor) {
($entry{Key_A}) = split(/[\s~]/, $entry{A});
$entry{Key_A} =~ s/^{([^}]*)}?.*$/$1/;
}
} elsif ($acauthor) {
$entry{Q} = $entry{A};
($entry{Key_Q}) = split(/[\s~]/, $entry{Q});
$entry{Key_Q} =~ s/^{([^}]*)}?.*$/$1/;
delete $entry{Key_A};
delete $entry{A};
}
# set or generate key
&genkey();
# determine the Entry Type
# This is where the heuristics come into play. We need to examine what
# fields we were given, and sometimes examine the field contents, to
# determine what type of entry this is.
if ($entry{J} && !$entry{B}) {
$type = 'article';
$_ = $entry{J};
if (/^proc\w*\.\s/i || /proceeding/i || /proc[.]?\s+of\s/i ||
/conference/i || /symposium/i || /workshop/i ) {
$type = 'inproceedings';
$entry{B} = $entry{J};
if ($entry{N}) { # These should be %B Proc, %J Journal, but do anyway.
# Hope they did "proceedings of ..., published as ..."
if (/^(.*)published\s+(in|as)\s+(.*)$/i) {
$entry{B} = $1;
$entry{J} = $3;
$entry{B} =~ s/,?\s*$//;
}
$entry{O} .= "Published as $entry{J}";
if ($entry{V}) { $entry{O} .= ", volume $entry{V}"; }
if ($entry{N}) { $entry{O} .= ", number $entry{N}"; }
delete $entry{V};
delete $entry{N};
}
delete $entry{J};
}
} elsif ($entry{B}) {
$type = '';
if ($entry{T}) {
$type .= 'in';
}
$_ = $entry{B};
if (/^proc\w*\.\s/i || /proceeding/i || /conference/i || /workshop/i) {
$type .= 'proceedings';
} else {
$type .= 'collection';
}
if ($entry{J}) {
$entry{O} .= "Published as $entry{J}";
if ($entry{V}) { $entry{O} .= ", volume $entry{V}"; }
if ($entry{N}) { $entry{O} .= ", number $entry{N}"; }
delete $entry{J};
delete $entry{V};
delete $entry{N};
}
} elsif ($entry{R}) {
$type = 'techreport';
$_ = $reptype;
s/^{\\[rbi][mft] //g; # just in case someone changed the font
tr/A-Za-z//cd; # only A-z are left
if (/^phd/i) {
$type = 'phdthesis';
$reptype = "Ph.{D}. Thesis";
}
if (/^diploma/i) {
$type = 'phdthesis';
$reptype = "Diploma Thesis";
}
if (/^master/i || /^m[as]thes/i) {
$type = 'mastersthesis';
$reptype = "Master's Thesis";
}
if (/^phd/i || /^master/i || /^m[as]thes/i || /^diploma/i) {
if ($entry{R} =~ /thesis/i) {
($repnumber) = $entry{R} =~ /thesis\W*(.*)$/i;
}
if ($entry{R} =~ /dissert/i) {
$reptype =~ s/Thesis/Dissertation/;
($repnumber) = $entry{R} =~ /dissert\w*\W*(.*)$/i;
}
}
/^draft/i && ($type = 'unpublished');
/^unpublish/i && ($type = 'unpublished');
if (!$entry{N}) {
$entry{N} = $repnumber;
}
$entry{Type} = $reptype;
undef $reptype;
undef $repnumber;
if ( (!$entry{Q}) && ($entry{I}) ) {
$entry{Q} = $entry{I};
delete $entry{I};
}
} elsif ($entry{I}) {
$type = 'book';
} else {
$type = 'misc';
}
# BibTeX has no collection type, sigh. We change 'collection' to 'book'.
if ($type eq 'collection') {
$type = 'book';
}
$number{$type}++;
# if we have an institution but no author, the Inst. is the author
if ( ($entry{Q}) && (!$entry{A}) ) {
$entry{A} = "{" . $entry{Q} . "}";
}
# if there is no address, but a "header" field, assume H stands for "held in"
if ( ($entry{H}) && (!$entry{C}) ) {
$entry{C} = $entry{H};
delete $entry{H};
}
# set institution to be the corporate author unless it's Anonymous
if ($entry{Q} !~ /^anon\.?\w*$/i) {
$entry{Ins} = $entry{Q};
}
# if we have a reptype and number, but no "Type" entry, move to other.
if ($reptype) {
$entry{O} .= $entry{R};
delete $entry{R};
}
# Change things around for each types
$_ = $type;
/^mastersthesis/ && do { $entry{Sch} = $entry{Ins}; delete $entry{Ins}; };
/^phdthesis/ && do { $entry{Sch} = $entry{Ins}; delete $entry{Ins}; };
/^unpublished/ && do { $entry{O} .= $entry{Ins}; delete $entry{Ins}; };
# Syntax checking
/^article/ && (&syntax(A, T, J, Yr));
/^book/ && (&syntax(AE, T, I, Yr));
/^incollection/ && (&syntax(A, T, B, I, Yr));
/^inproceedings/ && (&syntax(A, T, B, Yr));
/^mastersthesis/ && (&syntax(A, T, Sch, Yr));
/^phdthesis/ && (&syntax(A, T, Sch, Yr));
/^proceedings/ && (&syntax(BT, Yr));
/^techreport/ && (&syntax(A, T, Ins, Yr));
/^unpublished/ && (&syntax(A, T, O));
# set up the entry output string
$ent = '';
$ent .= "@$type\{$key,\n";
if ($entry{Key}) { $ent .= " key = \{$entry{Key}\},\n"; }
if ($entry{A}) { $ent .= " author = \{$entry{A}\},\n"; }
if ($entry{E}) { $ent .= " editor = \{$entry{E}\},\n"; }
if ($entry{T}) { $ent .= " title = \{$entry{T}\},\n"; }
if ($entry{B}) {
if ($entry{T}) {
$ent .= " booktitle = \{$entry{B}\},\n";
} else {
$ent .= " title = \{$entry{B}\},\n";
} }
if ($entry{Ins}) { $ent .= " institution = \{$entry{Ins}\},\n"; }
if ($entry{Sch}) { $ent .= " school = \{$entry{Sch}\},\n"; }
if ($entry{J}) { $ent .= " journal = \{$entry{J}\},\n"; }
if ($entry{Type}){ $ent .= " type = \{$entry{Type}\},\n"; }
if ($entry{S}) { $ent .= " series = \{$entry{S}\},\n"; }
if ($entry{V}) { $ent .= " volume = \{$entry{V}\},\n"; }
if ($entry{N}) { $ent .= " number = \{$entry{N}\},\n"; }
if ($entry{Ed}) { $ent .= " edition = \{$entry{Ed}\},\n"; }
if ($entry{P}) { $ent .= " pages = \{$entry{P}\},\n"; }
if ($entry{I}) { $ent .= " publisher = \{$entry{I}\},\n"; }
if ($entry{C}) { $ent .= " address = \{$entry{C}\},\n"; }
# since we allow abbrevs for month, don't print {}s
if ($entry{Mo}) { $ent .= " month = $entry{Mo},\n"; }
if ($entry{Yr}) { $ent .= " year = \{$entry{Yr}\},\n"; }
if ($entry{'$'}) { $ent .= " price = \{$entry{'$'}\},\n"; }
if ($entry{'*'}) { $ent .= " copyright = \{$entry{'*'}\},\n"; }
if ($entry{K}) { $ent .= " keywords = \{$entry{K}\},\n"; }
if ($entry{M}) { $ent .= " mrnumber = \{$entry{M}\},\n"; }
if ($entry{l}) { $ent .= " language = \{$entry{l}\},\n"; }
if ($entry{U}) { $ent .= " annote = \{$entry{U}\},\n"; }
if ($entry{ISBN}){ $ent .= " ISBN = \{$entry{ISBN}\},\n"; }
if ($entry{ISSN}){ $ent .= " ISSN = \{$entry{ISSN}\},\n"; }
if ($entry{X}) { $ent .= " abstract = \{$entry{X}\},\n"; }
if ($entry{G}) { $ent .= " note = \{$entry{G}\},\n"; }
if ($entry{H}) { $ent .= " note = \{$entry{H}\},\n"; }
if ($entry{O}) { $ent .= " note = \{$entry{O}\},\n"; }
if ($entry{Z}) { $ent .= " note = \{$entry{Z}\},\n"; }
if ($entry{Y}) { if (!$prcontents) { $entry{Y} = "(not listed)"; }
$ent .= " contents = \{$entry{Y}\},\n"; }
substr($ent, -2, 1) = '';
$ent .= "\}\n\n";
&printerrors();
print $ent;
}
##########################################
#
# date looks like month dec year
# -------------------------------- ------------------- -- ---------------
# 1984 84 1984
# 1974-1975 74 1974-1975
# August 1984 aug 84 1984
# May 1984 May 1984 may 84 1984
# 1976 November nov 76 1976
# 1976 November 1976 nov 76 1976
# 21 August 1984 {21 August} 84 1984
# August 18-21, 1984 {August 18-21} 84 1984
# 18-21 August 1991 {18-21 August} 91 1991
# July 31-August 4, 1984 1984 {July 31-August 4} 84 1984
# July-August 1980 {July-August} 80 1980
# February 1984 (revised May 1991) feb 84 1984
# Winter 1990 {Winter} 90 1990
# 1988 (in press) 88 1988 (in press)
# to appear ?? to appear
#
sub parsedate {
local($date) = $entry{D};
# These were done earlier for each field
# $date =~ s/^\s+//;
# $date =~ s/\s+$//;
$date =~ s/(\S+)\s+(\d+)\s+\1\s+\2/$1 $2/; # handle duplicate dates
$date =~ s/^\s*(\d\d\d+)\s+(\S+)/$2 $1/; # handle 1976 November
while ($date =~ /\s*[(]?((\d\d\d\d[-\/])?\d\d\d\d)[).]?\s*(\(.*\))?$/) {
$entry{Yr} = $1;
$date =~ s/,?\s*[(]?(\d\d\d\d[-\/])?\d\d\d\d[).]?\s*(\(.*\))?$//;
}
# $entry{YrKey} = $entry{Yr} ? $entry{Yr} : "????";
if ($entry{Yr}) {
$entry{YrKey} = $entry{Yr};
} elsif ($date =~ /(\d\d\d\d)/) {
$entry{YrKey} = $1;
} else {
$entry{YrKey} = "????";
}
$entry{Decade} = substr($entry{YrKey}, 2, 2);
if (length($date) == 0) { return; }
$_ = $date;
if (!/[-\d]/) {
/^jan/i && do { $entry{Mo} = "jan"; };
/^feb/i && do { $entry{Mo} = "feb"; };
/^mar/i && do { $entry{Mo} = "mar"; };
/^apr/i && do { $entry{Mo} = "apr"; };
/^may/i && do { $entry{Mo} = "may"; };
/^jun/i && do { $entry{Mo} = "jun"; };
/^jul/i && do { $entry{Mo} = "jul"; };
/^aug/i && do { $entry{Mo} = "aug"; };
/^sep/i && do { $entry{Mo} = "sep"; };
/^oct/i && do { $entry{Mo} = "oct"; };
/^nov/i && do { $entry{Mo} = "nov"; };
/^dec/i && do { $entry{Mo} = "dec"; };
}
if (!$entry{Mo}) {
if (!$entry{Yr}) {
$entry{Yr} = $entry{D};
}
else {
$entry{Mo} = '{' . $date . '}';
}
}
$entry{Decade} = substr($entry{YrKey}, 2, 2);
}
##########################################
# key is Author's last name followed by last 2 digits of year.
# in corporate author's case, key is first word and first 2 digits.
# order is L, A, Q, E, I, "Anonymous"
# In case of conflict, ascending letters are added to the end
# Perl knows that "z"+1 == "aa" and "az"+1 == "ba". Uskomatonta!
#
# BiBTeX's cite keys are case-INsensitive. We want to keep the
# pretty looking capitalization though, so we modify key and lkey.
# We now check Label fields for duplicate keys
sub genkey {
local($noadd) = @_;
local($name, $lenkey);
if ($entry{L} && (!$ignorelabel) ) {
$key = $entry{L};
} else {
$name = $entry{Key_A} || $entry{Key_Q} || $entry{Key_E}
|| $entry{Key_I} || $noadd || "Anonymous";
$name = sprintf("%.${maxllength}s", $name);
$key = $name . $entry{Decade};
}
$key =~ s/,//g;
$lenkey = length($key);
$lkey = $key;
$lkey =~ tr/A-Z/a-z/; # citekeys are case-insensitive
if ($allkeys{$lkey}) {
$key .= 'a';
$lkey = $key;
$lkey =~ tr/A-Z/a-z/;
while ($allkeys{$lkey}) {
substr($key,$lenkey)++; # increment all chars past Decade
$lkey = $key;
$lkey =~ tr/A-Z/a-z/;
}
}
if ($noadd) {
return($key);
}
$allkeys{$lkey} = $key;
if ($name eq "Anonymous") {
$entry{Key} = $key;
}
}
##########################################
# parsename parses names into BiBTeX format
#
# This uses heuristics to parse a name into First, von, Last, and Jr
# parts. It handles multiple names (John doe, jane doe) on a line.
# It does not handle names in "last, first" format.
# it returns a key (last name of author or editor, first name of corp).
# It sets $fname to the full bibtex name.
# It sets $editors, $authors, or $corpauthors if it thinks the name is one.
#
sub parsename {
local($allnames, $ntype) = @_;
local($firstn, $vonn, $lastn, $jrn);
local(@names, $keyn, $oname, $nname, $rest);
undef $fname;
$editors = $authors = $corpauthors = 0;
# handle unpaddable spaces (\ ) in names as if they were ties (\0)
$allnames =~ s/\\ /~/g; # the ties (~) get converted back later.
$allnames =~ s/\s+/ /g;
$allnames =~ s/^and //;
$allnames =~ s/^and$//;
if ( ($allnames !~ /\s/) && ($allnames !~ /anonymous/i) ){
$corpauthors = 1;
}
@names = split(/ and /, $allnames);
if (!$nameconv) {
$fname = $allnames;
$_ = shift @names;
# if we're leaving names alone, they're probably already in "Last, First"
# format, so use the first part of the name as the key.
($name) = /^\s*(\S*)/;
# ($name) = /(\S*)\s*$/;
$name =~ tr/A-Za-z0-9\/\-//cd;
return $name;
}
while (@names) {
$oname = $name = shift @names;
$firstn = $vonn = $lastn = '';
if ( $revauthor && ($ntype eq A) && ($name =~ /,/) ) {
$jrn = "";
if ($name =~ s/[,\s]+([sj]r\.?|I+)\s*$//i) {
$jrn = ", " . $1;
}
$name =~ s/^(.*)\s*,\s*(.*)/$2 $1$jrn/g;
}
$jrn = "";
$name =~ s/[\s~]+([sj]r\.?|\(?edi?t?o?r?s?\.?\)?|I+)(,|$)/, $1/i;
$name =~ s/,,/,/g;
($nname, $jrn) = split(/,[^~]/, $name, 2);
# print "name: $name -> $nname : $jrn\n";
$nname =~ s/\s+$//;
$jrn =~ s/^[\s~]+//;
$jrn =~ s/,$//;
if ($jrn =~ /\s/) {
($jrn, $rest) = $jrn =~ /([sj]r\.?|\(?edi?t?o?r?s?\.?\)?|I+)?,?\s*(.*)$/i;
unshift(@names, $rest);
}
$jrn =~ s/([^\\])~/$1 /g;
($firstn) = $nname =~ /^((\S* )*)/;
$nname = substr($nname, length($firstn));
$lastn = $nname;
$lastn =~ s/([^\\])~/$1 /g;
$firstn =~ s/([^\\])~/$1 /g;
while ($firstn =~ / ([a-z]+ )$/) {
$rest = $1;
$vonn = $rest . $vonn;
$firstn = substr($firstn, 0, length($firstn) - length($rest));
}
while ($lastn =~ /^([a-z]+ )/) {
$rest = $1;
$vonn .= $rest;
$lastn = substr($lastn, length($rest));
}
if ($jrn) {
if ($jrn =~ /^(et\.?\s*al\.?)|(others)$/i) {
undef $jrn;
unshift(@names, "others");
}
if ($jrn =~ /^[(]?edi?t?o?r?s?[\.]?[)]?$/i) {
undef $jrn;
$editors = 1;
}
if ($jrn =~ /^inc[\.]?$/i) {
$lastn .= ", " . $jrn;
undef $jrn;
$corpauthors = 1;
}
}
if ($lastn =~ /^(et\s*al)|(others)$/i) {
$lastn = "others";
}
if ($lastn =~ /\s/) {
$lastn = "{" . $lastn . "}";
}
if (!$keyn) {
if ($corpauthors) {
($keyn) = $lastn =~ /^(\S+)/;
} else {
($keyn) = $lastn; # =~ /(\S+)$/; # if you want last of Last
}
$keyn =~ tr/A-Za-z0-9\/\-//cd;
}
if ($jrn) {
$fname .= " and " . $vonn . $lastn . ", " . $jrn . ", " . $firstn;
} else {
$fname .= " and " . $firstn . $vonn . $lastn;
}
}
$fname =~ s/^ and\s+//;
$fname =~ s/\s+$//;
$fname =~ s/\s+/ /g;
if ($ntype eq A) {
if ($corpauthors) {
&anerror("Corporate Author (%Q) in %A.");
} elsif ($editors) {
&anerror("Editors (%E) in %A.");
}
} elsif ($ntype eq Q) {
if ($editors) {
&anerror("Editors (%E) in %Q.");
}
} elsif ($ntype eq E) {
if ($corpauthors && (!$entry{A})) {
&anerror("Corporate Author (%Q) in %E.");
}
}
return $keyn;
}
##########################################
# syntax does syntax checking
#
sub syntax {
foreach $field (@_) {
if ($field eq AE) {
if ( (!$entry{A}) && (!$entry{E}) ) {
&anerror("Missing A and E (Author and Editor) fields.");
}
} elsif ($field eq BT) {
if ( (!$entry{B}) && (!$entry{T}) ) {
&anerror("Missing T (Title) field.");
}
} else {
if (!$entry{$field}) {
&anerror("Missing $field field.");
}
}
}
}
##########################################
# stores error information until it gets printed
#
# This allows us to fully process the entry so we can print out
# valid key information without having to go through ugly gyrations.
#
sub anerror {
local($err) = @_;
push(@errorstring, $err);
$errors++;
}
##########################################
# prints out stored error information
#
sub printerrors {
local($klen, $errst);
if (@errorstring && (!$nowarnings)) {
$klen = $maxllength; # a little short, but most labels aren't this long
foreach $_ (@errorstring) {
$errst .= sprintf("%-${klen}s (%5d): %s\n", $key, $errline, $_);
}
print STDERR $errst;
}
undef @errorstring;
$errline = $linenum+1;
}
##########################################
# converts *roff characters to TeX characters
#
# If anyone has any corrections or additions, I'd be happy to see them.
#
# Is there a better way to do this? (i.e. eval)
#
sub dorofftotex {
local($_) = @_;
local($fbraces, $nchanges);
study; # presumably this will help us.
# tib: refer format, TeX formatting.
# This should probably be set up to read a configuration file into
# a variable then use eval. If there is such a beast as a "detibify"
# program, then this won't be necessary.
if ($tibfmt) {
1 while s#\\egroup(.*)\\bgroup#{\\Reffont $1}#g;
s/\\Citefont//g;
s/\\ACitefont//g;
s/\\Authfont//g;
s/\\Titlefont//g;
s/\\Tomefont/\\sl/g;
s/\\Volfont//g;
s/\\Flagfont//g;
s/\\Reffont/\\rm/g;
s/\\Smallcapsfont/\\sevenrm/g;
s/\\Flagstyle//g; # This should be smarter
if (/\|/) {
s/\|JAN\|/January/g; # yes, the parsedate routine can handle
s/\|FEB\|/February/g; # these most of the time, but sometimes
s/\|MAR\|/March/g; # they're put in the middle of non-date
s/\|APR\|/April/g; # strings, so we'd better convert them.
s/\|MAY\|/May/g;
s/\|JUN\|/June/g;
s/\|JUL\|/July/g;
s/\|AUG\|/August/g;
s/\|SEP\|/September/g;
s/\|OCT\|/October/g;
s/\|NOV\|/November/g;
s/\|DEC\|/December/g;
# My example of tib format is AGbib from INRIA, so this is set up to
# handle the common cases for that bibliography.
s/\|UNIV\|/University/g;
s/\|DEPT\|/Department/g;
s/\|DCS\|/Department of Computer Science/g;
s/\|PCS\|/Progr. and Computer Science/g;
s/\|CSD\|/Computer Science Department/g;
s/\|TR\|/Technical Report/g;
s/\|COMPJ\|/The Computer Journal/g;
s/\|JACM\|/Journal of the ACM/g;
s/\|CACM\|/Communications of the ACM/g;
s/\|SGPLN\|/Sigplan Notices/g;
s/\|SIAJC1\|/SIAM Journal on Computing/g;
s/\|ACTAI2\|/Acta Informatica/g;
s/\|IEETS1\|/IEEE Transactions on Software Engineering/g;
s/\|INFPL2\|/Information Processing Letters/g;
if (s/\|LNCS\|/Lecture Notes in Computer Science/g) {
$entry{I} .= " " . "Springer-Verlag";
$entry{C} .= " " . "New York--Heidelberg--Berlin"; }
if (s/\|IFBSV\|/Inf. Fachb./g) {
$entry{I} .= " " . "Springer-Verlag";
$entry{C} .= " " . "New York--Heidelberg--Berlin"; }
s/\|SCICP\|/Science of Computer Programming/g;
s/\|SP&E\|/Software---Practice and Experience/g;
s/\|POPL\|/ACM Symp. on Principles of Progr. Languages/g;
s/\|TOPLAS\|/ACM Trans. Progr. Languages and Systems/g;
if (s/\|Addison\|/Addison Wesley/g) {
$entry{C} .= " " . "Reading, MA"; }
if (s/\|PrHall\|/Prentice Hall/g) {
$entry{C} .= " " . "Englewood Cliffs, NJ"; }
if (s/\|NHoll\|/North-Holland/g) {
$entry{C} .= " " . "Amsterdam"; }
if (s/\|Cambridge\|/Cambridge University Press/g) {
$entry{C} .= " " . "New York"; }
if (s/\|Springer\|/Springer-Verlag/g) {
$entry{C} .= " " . "New York--Heidelberg--Berlin"; }
s/\|TWEINF\|/Onderafdeling der Informatica, Tech. Hogeschool Twente/g;
s/\|TUMINF\|/Institut f{\"u}r Informatik, Tech. University M{\"u}nchen/g;
s/\|HELDCS\|/Department of Computer Science, University of Helsinki/g;
if (s/\|IBMTJW\|/IBM T.J. Watson Research Center/g) {
$entry{C} .= " " . "Yorktown Heights, NY"; }
if (s/\|INRIA\|/INRIA/g) {
$entry{C} .= " " . "Rocquencourt"; }
if (s/\|IRIAL\|/IRIA-Laboria/g) {
$entry{C} .= " " . "Rocquencourt"; }
$entry{C} =~ s/^\s+//;
$entry{I} =~ s/^\s+//;
}
return $_;
}
s#_#_U#g; # _ will be the escape character
# don't do troff character conversion if there aren't any backslashes
# in the string. Hopefully this will save a little work.
if (/\\/) {
# to make commands, we need command characters, but we don't want
# any of the command characters that they use to be passed through
# or we'll end up with invalid input. So, _ is the escape character.
# _U is _
# _B is a backslash
# _I is a literal backslash
# _S is a space
# _C is {\
# _L is {
# _R is }
# _l is <
# _g is >
# _T is ~
# _A is ^
# _D is $
# _M is $\
# _V is |
# _E is ${}^
# _H is \hbox{
# _h is \leavevmode
# _c is a special continuation character for long lines
# I'm not sure I quite get this -- refer strips off one \ for most
# characters it seems. But other times it doesn't. Argh! I'll
# go ahead and replace \\ with \ to handle this. It shouldn't ever
# come up that this is bad since \e and \(rs are a real backslashes.
s#\\\\#\\#g; # \\ -> \
# font changes
# if one uses \fP, everything is fine -- otherwise we need to get complex
$fbraces = 0;
$fbraces += s#\\f[1R]#_Crm_S#g; # \f1 -> {\rm
$fbraces += s#\\f[2I]#_Cit_S#g; # \f2 -> {\it
$fbraces += s#\\f[3B]#_Cbf_S#g; # \f3 -> {\bf
$fbraces -= s#\\fP#_R#g; # \fP -> }
while ($fbraces) { # too many {'s
if ($fbraces < 0) {
$nchanges = s#_R##;
&anerror("Used \\fP with no previous font.");
$fbraces += $nchanges;
} else { # Changed newline matching because 4.019 had problems
# $nchanges = s#(_Cit_S)([\s\S]*)_Crm_S#$1$2_R#;
$nchanges = s#(_Cit_S)((.|\n)*)_Crm_S#$1$2_R#;
if (!$nchanges)
{ $nchanges = s#(_Cbf_S)([\s\S]*)_Crm_S#$1$2_R#; }
if (!$nchanges)
{ $nchanges = s#(_C\w\w_S)([\s\S]*)_C\w\w_S#$1$2_R#; }
if (!$nchanges) {
$_ .= "_R"; # couldn't get it, so stick a } on
$fbraces--;
&anerror("Problems with font changing. Suggest using \\fP.");
}
$fbraces -= ($nchanges * 2);
}
}
# point size changes
# first, U\s-2NIX\s0 -> {\sc Unix}
s/\b([A-Z])\\s-[12]([A-Z]+)\\s0/_Csc_S$1\L$2\E_R/g;
# very similar to font changes. If \s0 is used, everything is fine.
$fbraces = 0;
$fbraces += s#\\s-1#_Csmall_S#g; # \s-1 -> {\small
$fbraces += s#\\s-2#_Cfootnotesize_S#g; # \s-2 -> {\footnotesize
$fbraces += s#\\s-3#_Cscriptsize_S#g; # \s-3 -> {\scriptsize
$fbraces += s#\\s-4#_Ctiny_S#g; # \s-4 -> {\tiny
$fbraces += s#\\s+1#_Clarge_S#g; # \s+1 -> {\large
$fbraces += s#\\s+2#_CLarge_S#g; # \s+2 -> {\Large
$fbraces += s#\\s+3#_CLARGE_S#g; # \s+3 -> {\LARGE
$fbraces += s#\\s+4#_Chuge_S#g; # \s+4 -> {\huge
$fbraces -= s#\\s0#_R#g; # \s0 -> }
while ($fbraces) { # too many {'s
if ($fbraces < 0) {
$nchanges = s#_R##;
&anerror("Used \\s0 with no previous point size change.");
$fbraces += $nchanges;
} else {
$nchanges = s#(_Csmall_S)(.*)_Clarge_S#$1$2_R#;
if (!$nchanges)
{ $nchanges = s#(_Cfootnotesize_S)(.*)_CLarge_S#$1$2_R#; }
if (!$nchanges)
{ $nchanges = s#(_Cscriptsize_S)(.*)_CLARGE_S#$1$2_R#; }
if (!$nchanges)
{ $nchanges = s#(_Clarge_S)(.*)_Csmall_S#$1$2_R#; }
if (!$nchanges)
{ $nchanges = s#(_CLarge_S)(.*)_Cfootnotesize_S#$1$2_R#; }
if (!$nchanges)
{ $nchanges = s#(_CLARGE_S)(.*)_Cscriptsize_S#$1$2_R#; }
if (!$nchanges) {
$_ .= "_R"; # last resort. Add an }.
$fbraces--;
&anerror("Problems with point size changing. Suggest using \\s0.");
}
$fbraces -= ($nchanges * 2);
}
}
# other troff special characters
# some of these aren't available as standard TeX, so I made up replacements.
# Perhaps they should be def'ed in a preamble and used that way, but I
# doubt most files use \(rg, \(ct, and such, so why waste resources.
# If you're really concerned about eth, thorn, yogh, or ogonek, go get
# the cmoer fonts -- they do the characters right.
# grab some common overstrikes made by people who don't have a real
# troff manual or implementation.
s#\\o'(\w)\\\(aa'#_C'$1_R#g; # \o'e\(aa' -> {\'e}
s#\\o'(\w)\\\(ga'#_C`$1_R#g; # \o'e\(ga' -> {\`e}
# Lots of bibliographies from Europe use \:o to mean \(o:, etc. Both
# Elan troff and groff don't know what this means, so I don't do the
# conversion by default. Use '-overstrike' to get this behaviour.
if ($overstrike) {
s#\\([:`'^~,v/o])([AEIOUYaeiouyNnCcSs])#\\\($2$1#g;
}
s#\\\(bu#_Mbullet_D#g; # \(bu -> $\bullet$
s#\\\(ci#_Mbigcirc_D#g; # \(ci -> $\bigcirc$
s#\\\(sq#_MBox_D#g; # \(sq -> $\Box$
s#\\\(ct#_h_Brm_Brlap/c_R#g; # \(ct -> \hbox{\rm\rlap/c}
s#\\\(rg#_h_Braise.6em_H_Booalign_L_L_Bmathhexbox20D_R_Bcrcr\n_Bhfil_Braise.07ex_Hr_R_Bhfil_R_R_R#g;
s#\\\(co#_h_Braise.6em_H_Bcopyright_R_R#g;
s#\\\(lh#_MLongleftarrow_D#g; # \(lh -> $\Longleftarrow$ #wrong!
s#\\\(rh#_MLongrightarrow_D#g; # \(rh -> $\Longrightarrow$
s#\\\(dg#_Bdag #g; # \(dg -> \dag
s#\\\(dd#_Bddag #g; # \(dd -> \ddag
s#\\\(sc#_BS #g; # \(sc -> \S
s#\\\(br#_D_V_D#g; # \(br -> $|$
s#\\\(fm#_E_Bprime_D#g; # \(fm -> ${}^\prime$
s#\\\(de#_E_Bcirc_D#g; # \(de -> ${}^\circ$
s#\\\(em#--#g; # \(em -> --
s#\\\(hy#-#g; # \(hy -> -
s#\\\(ru#_Cvrule width1.2ex height0.1ex depth0ex_R#g;
s#\\\(ul#_Cvrule width1.2ex height-.3ex depth.4ex_R#g;
s#\\\-#---#g; # \- -> --
s#\\\(aa#_C'_L _R_R#g; # \(aa -> {\'{ }}
s#\\'#_C'_L _R_R#g; # \' -> {\'{ }}
s#\\\(ga#_C`_L _R_R#g; # \(ga -> {\`{ }}
s#\\`#_C`_L _R_R#g; # \` -> {\`{ }}
s#\\\(sl#/#g; # \(sl -> /
s#\\e#_I#g; # \e -> $\backslash$
s#\\0#_T#g; # \0 -> ~
s#\\ #_B #g; # '\ ' -> '\ '
s#\\\^#_D_B,_D#g; # \^ -> $\,$
s#\\\|#_D_B:_D#g; # \| -> $\:$
s#\\\(fi#fi#g; # \(fi -> fi
s#\\\(fl#fl#g; # \(fl -> fl
s#\\\(ff#ff#g; # \(ff -> ff
s#\\\(Fi#ffi#g; # \(Fi -> ffi
s#\\\(Fl#ffl#g; # \(Fl -> ffl
s#\\\(pl#_D+_D#g; # \(pl -> $+$
s#\\\(mi#_D-_D#g; # \(mi -> $-$
s#\\\(mu#_Mtimes_D#g; # \(mu -> $\times$
s#\\\(di#_Mdiv_D#g; # \(di -> $\div$
s#\\\(\+\-#_Mpm_D#g; # \(+- -> $\pm$
s#\\\(no#_Mneg_D#g; # \(no -> $\neg$
s#\\\(\*\*#_Mast_D#g; # \(** -> $\ast$
s#\\\(eq#_D=_D#g; # \(eq -> $=$
s#\\\(>=#_Mgeq_D#g; # \(>= -> $\geq$
s#\\\(<=#_Mleq_D#g; # \(<= -> $\leq$
s#\\\(==#_Mequiv_D#g; # \(== -> $\equiv$
s#\\\(~=#_Msimeq_D#g; # \(~= -> $\simeq$
s#\\\(ap#_Msim_D#g; # \(ap -> $\sim$
s#\\\(!e#_Mneq_D#g; # \(!e -> $\neq$
s#\\\(\->#_Mrightarrow_D#g; # \(-> -> $\rightarrow$
s#\\\(<\-#_Mleftarrow_D#g; # \(<- -> $\leftarrow$
s#\\\(ua#_Muparrow_D#g; # \(ua -> $\uparrow$
s#\\\(da#_Mdownarrow_D#g; # \(da -> $\downarrow$
s#\\\(cu#_Mcup_D#g; # \(cu -> $\cup$
s#\\\(ca#_Mcap_D#g; # \(ca -> $\cap$
s#\\\(sb#_Msubset_D#g; # \(sb -> $\subset$
s#\\\(sp#_Msupset_D#g; # \(sp -> $\supset$
s#\\\(ib#_Msubseteq_D#g; # \(ib -> $\subseteq$
s#\\\(ip#_Msupseteq_D#g; # \(ip -> $\supseteq$
s#\\\(if#_Minfty_D#g; # \(if -> $\infty$
s#\\\(es#_Memptyset_D#g; # \(es -> $\emptyset$
s#\\\(is#_Mint_D#g; # \(is -> $\int$
s#\\\(pd#_Mpartial_D#g; # \(pd -> $\partial$
s#\\\(sr#_Msurd_D#g; # \(sr -> $\surd$
s#\\\(gr#_Mnabla_D#g; # \(gr -> $\nabla$
s#\\\(pt#_Mpropto_D#g; # \(pt -> $\propto$
s#\\\(mo#_Min_D#g; # \(mo -> $\in$
s#\\\(or#_Mmid_D#g; # \(or -> $\mid$
s#\\\((\d)(\d)#_D$1_Bover$2_D#g; # \(14 -> $1\over4$
s#\\\(m\.#_Mcdot_D#g; # \(m. -> $\cdot$
s#\\\(!s#_Mnot_Bsubset_D#g; # \(!s -> $\not\subset$
s#\\\(an#_Mwedge_D#g; # \(an -> $\wedge$
s#\\\(lo#_Mvee_D#g; # \(lo -> $\vee$
s#\\\(tf#_D_H._R_Braise.9ex_H._R_H._R_D#g;# \(tf -> .:.
s#\\\(cm#_Mni_D#g; # \(cm -> $\ni$
s#\\\(fa#_Mforall_D#g; # \(fa -> $\forall$
s#\\\(te#_Mexists_D#g; # \(te -> $\exists$
s#\\\(!m#_Mnotin_D#g; # \(!m -> $\notin$
s#\\\(a\+#_Moplus_D#g; # \(a+ -> $\oplus$
s#\\\(ax#_Motimes_D#g; # \(ax -> $\otimes$
s#\\\(ag#_Mangle_D#g; # \(ag -> $\angle$
s#\\\(rn#_Moverline_L _R_D#g; # \(rn -> $\overline{ }$
s#\\\(<<#_Mll_D#g; # \(<< -> $\ll$
s#\\\(>>#_Mgg_D#g; # \(>> -> $\gg$
s#\\\(<>#_Mleftrightarrow_D#g; # \(<> -> $\leftrightarrow$
s#\\\(//#_D/_D#g; # \(// -> $/$
s#\\\(L<#_Mlangle_D#g; # \(L< -> $\langle$
s#\\\(R>#_Mrangle_D#g; # \(R> -> $\rangle$
s#\\\(dm#_Mdiamond_D#g; # \(dm -> $\diamond$
s#\\\(lt#_Mlbrace_D#g; # \(lt -> $\lbrace$
s#\\\(rt#_Mrbrace_D#g; # \(rt -> $\rbrace$
s#\\\(lb#_Mlfloor_D#g; # \(lb -> $\lfloor$
s#\\\(rt#_Mrfloor_D#g; # \(rt -> $\rfloor$
s#\\\(lk#_Mlbrace_D#g; # \(lk -> $\lbrace$
s#\\\(rk#_Mrbrace_D#g; # \(rk -> $\rbrace$
s#\\\(lf#_Mlfloor_D#g; # \(lf -> $\lfloor$
s#\\\(rf#_Mrfloor_D#g; # \(rf -> $\rfloor$
s#\\\(lc#_Mlceil_D#g; # \(lc -> $\lceil$
s#\\\(rc#_Mrceil_D#g; # \(rc -> $\rceil$
s#\\\(bv#_Cmbox_Cboldmath_Mmid_D_R_R#g; # \(bv -> {\mbox{\boldmath$\mid$}}
s#\\\(bx#_Cvrule width.5em height.6em depth-.1em_R#g;
s#\\\(cf#^#g; # \(cf -> ^
s#\\\(al#_Maleph_D#g; # \(al -> $\aleph$
s#\\\(If#_MIm_D#g; # \(If -> $\Im$
s#\\\(Rf#_MRe_D#g; # \(Rf -> $\Re$
s#\\\(ws#_Mwp_D#g; # \(ws -> $\wp$
s#\\\(mt#_E_Bprime_D#g; # \(mt -> ${}^\prime$
s#\\\(sd#_E_L_Bprime_B!_Bprime_R_D#g; # \(sd -> ${}^{\prime\!\prime}$
s#\\\(pa#_BP#g; # \(pa -> \P
s#\\\(Cc#_Mclubsuit_D#g; # \(Cc -> $\clubsuit$
s#\\\(Cd#_Mdiamondsuit_D#g; # \(Cd -> $\diamondsuit$
s#\\\(Ch#_Mheartsuit_D#g; # \(Ch -> $\heartsuit$
s#\\\(Cs#_Mspadesuit_D#g; # \(Cs -> $\spadesuit$
s#\\\(bt#_Mperp_D#g; # \(bt -> $\perp$
s#\\\(<:#_MLeftarrow_D#g; # \(<: -> $\Leftarrow$
s#\\\(:>#_MRightarrow_D#g; # \(:> -> $\Rightarrow$
s#\\\(io#_MLeftrightarrow_D#g; # \(io -> $\Leftrightarrow$
s#\\\(u=#_MUparrow_D#g; # \(u= -> $\Uparrow$
s#\\\(d=#_MDownarrow_D#g; # \(d= -> $\Downarrow$
s#\\\(r1#_Mrightleftharpoons_D#g; # \(r1 -> $\rightleftharpoons$
s#\\\(r2#_Mleftharpoondown_D#g; # \(r2 -> $\leftharpoondown$
s#\\\(cr#_Mhookleftarrow_D#g; # \(cr -> $\hookleftarrow$
s#\\\(AL#_M_D#g; # \(AL ->
s#\\\(DL#_M_D#g; # \(DL ->
s#\\\(\*a#_Malpha_D#g; # \(*a -> $\alpha$
s#\\\(\*b#_Mbeta_D#g; # \(*b -> $\beta$
s#\\\(\*c#_Mxi_D#g; # \(*c -> $\xi$
s#\\\(\*d#_Mdelta_D#g; # \(*d -> $\delta$
s#\\\(\*e#_Mvarepsilon_D#g; # \(*e -> $\varepsilon$
s#\\\(\*f#_Mphi_D#g; # \(*f -> $\phi$
s#\\\(\*g#_Mgamma_D#g; # \(*g -> $\gamma$
s#\\\(\*h#_Mtheta_D#g; # \(*h -> $\theta$
s#\\\(\*i#_Miota_D#g; # \(*i -> $\iota$
s#\\\(\*k#_Mkappa_D#g; # \(*k -> $\kappa$
s#\\\(\*l#_Mlambda_D#g; # \(*l -> $\lambda$
s#\\\(\*m#_Mmu_D#g; # \(*m -> $\mu$
s#\\\(\*n#_Mnu_D#g; # \(*n -> $\nu$
s#\\\(\*o#_Do_D#g; # \(*o -> $o$
s#\\\(\*p#_Mpi_D#g; # \(*p -> $\pi$
s#\\\(\*q#_Mpsi_D#g; # \(*q -> $\psi$
s#\\\(\*r#_Mrho_D#g; # \(*r -> $\rho$
s#\\\(\*s#_Msigma_D#g; # \(*s -> $\sigma$
s#\\\(\*t#_Mtau_D#g; # \(*t -> $\tau$
s#\\\(\*u#_Mupsilon_D#g; # \(*u -> $\upsilon$
s#\\\(\*w#_Momega_D#g; # \(*w -> $\omega$
s#\\\(\*x#_Mchi_D#g; # \(*x -> $\chi$
s#\\\(\*y#_Meta_D#g; # \(*y -> $\eta$
s#\\\(\*z#_Mzeta_D#g; # \(*z -> $\zeta$
s#\\\(ts#_Mvarsigma_D#g; # \(ts -> $\varsigma$
s#\\\(\*C#_MXi_D#g; # \(*C -> $\Xi$
s#\\\(\*D#_MDelta_D#g; # \(*D -> $\Delta$
s#\\\(\*F#_MPhi_D#g; # \(*F -> $\Phi$
s#\\\(\*G#_MGamma_D#g; # \(*G -> $\Gamma$
s#\\\(\*H#_MTheta_D#g; # \(*H -> $\Theta$
s#\\\(\*L#_MLambda_D#g; # \(*L -> $\Lambda$
s#\\\(\*P#_MPi_D#g; # \(*P -> $\Pi$
s#\\\(\*Q#_MPsi_D#g; # \(*Q -> $\Psi$
s#\\\(\*R#_Crm_SP_R#g; # \(*R -> {\rm P}
s#\\\(\*S#_MSigma_D#g; # \(*S -> $\Sigma$
s#\\\(\*U#_Crm_SY_R#g; # \(*U -> {\rm Y}
s#\\\(\*W#_MOmega_D#g; # \(*W -> $\Omega$
s#\\\(\*Y#_Crm_SH_R#g; # \(*Y -> {\rm H}
s#\\\(\*(\w)#_Crm_S$1_R#g; # \(*_ -> {\rm _}
# from the -mm macros
s#\\\*\(Tm#_E_Crm_Buppercase_LTM_R_R_D#g;# \*(Tm -> ${}^{\rm\uppercase{TM}}$
# I am SO disgusted with troff. It seems that unless the -ms option is
# given, all accents are done in the -mm way e\*'. In fact, when the
# -ms option is given, only the original 7 accents are done postfix.
s#(ij)\\\*(['`])#_C$1_B$2_R#g; # i\*' -> {\'\i}
s#(ij)\\\*:#_C"_B$1_R#g; # i\*: -> {\"\i}
s#(ij)\\\*\^#_C_A_B$1_R#g; # i\*^ -> {\^\i}
s#(\w)\\\*(['`])#_C$2$1_R#g; # e\*' -> {\'e}
s#(\w)\\\*\^#_C_A$1_R#g; # e\*^ -> {\^e}
s#(\w)\\\*~#_C_T$1_R#g; # e\*~ -> {\~e}
s#(\w)\\\*:#_C"$1_R#g; # e\*: -> {\"e}
s#(\w)\\\*;#_C"$1_R#g; # U\*; -> {\"U}
s#(\w)\\\*,#_Cc_L$1_R_R#g; # e\*, -> {\c{e}}
# from the Berkeley -ms macros
s#\\\*\-#--#g; # \*- -> --
s#\\\*Q#``#g; # \*Q -> ``
s#\\\*U#''#g; # \*U -> ''
s#\\\*\(BU#_Mbullet_D#g; # \*(BU -> $\bullet$
s#\\\*\(EM#--#g; # \*(EM -> --
# changed in 0.8.1, from e\*' to \*'e.
# These only get used if the above fail (which they don't).
# An '-ms' option? What a hack..
s#\\\*(['`])(ij)#_C$2_B$1_R#g; # \*'i -> {\'\i}
s#\\\*:(ij)#_C"_B$1_R#g; # \*:i -> {\"\i}
s#\\\*\^(ij)#_C_A_B$1_R#g; # \*^i -> {\^\i}
s#\\\*(['`])(\w)#_C$1$2_R#g; # \*'e -> {\'e}
s#\\\*\^(\w)#_C_A$1_R#g; # \*^e -> {\^e}
s#\\\*~(\w)#_C_T$1_R#g; # \*~e -> {\~e}
s#\\\*C(\w)#_Cv_L$1_R_R#g; # \*Cc -> {\v{c}}
s#\\\*,(\w)#_Cc_L$1_R_R#g; # \*,e -> {\c{e}}
s#\\\*:(\w)#_C"$1_R#g; # \*:e -> {\"e}
s#(\w)\\\*v#_Cv_L$1_R_R#g; # c\*v -> {\v{c}}
s#(\w)\\\*_U#_C=$1_R#g; # e\*_ -> {\=e}
s#([Oo])\\\*/#_C$1_R#g; # o\*/ -> {\o}
s#(\w)\\\*\.#_Cd_L$1_R_R#g; # e\*. -> {\d{e}}
s#([Aa])\\\*o#_C$1$1_R#g; # a\*o -> {\aa}
s#\\\*([!?])#$1`#g; # \*? -> ?`
s#\\\*8#_Css_R#g; # \*8 -> {\ss}
s#\\\*3#_h_Blower.5ex_H3_R_R#g; # \*3 -> \hbox{\lower.5ex 3}
s#\\\*\(Th#_hI_Bhskip-.6ex_Braise.5ex_H_Mscriptscriptstyle_Bsupset_D_R_R#g;
s#\\\*\(th#_h_Clower.3ex_H_Blarge l_R_R_Bhskip-.52ex o_R#g;
s#\\\*D\-#_h_Booalign_L_LD_R_Bcrcr\n_Bhskip.2ex_Braise.25ex_H-_R_Bhfil_R_R#g;
s#\\\*d\-#_h_Booalign_L_Mpartial_D_Bcrcr\n_Bhskip.55ex_Braise.7ex_H-_R_Bhfil_R_R#g;
s#\\\*\(([AO])e#_C$1E_R#g; # \*(Ae -> {\AE}
s#\\\*\(([ao])e#_C$1e_R#g; # \*(ae -> {\ae}
s#\\\*q#_Cc_Lo_R_R#g; # \*q -> {\c{o}}
# International (Roman-8) symbols
s#\\\(\.\.#_C"_B _R#g; # \(.. -> {\"\ }
s#\\\(([AEIOUYaeouy]):#_C"$1_R#g; # \(A: -> {\"A}
s#\\\(([AEIOUaceouy])'#_C'$1_R#g; # \(A' -> {\'A}
s#\\\(([AEIOUaeouy])`#_C`$1_R#g; # \(A` -> {\`A}
s#\\\(([AEIOUaeouy])\^#_C_A$1_R#g; # \(A^ -> {\^A}
s#\\\(i:#_C"_Bi_R#g; # \(i: -> {\"\i}
s#\\\(i(['`])#_C$1_Bi_R#g; # \(i' -> {\'\i}
s#\\\(i\^#_C_A_Bi_R#g; # \(i^ -> {\^\i}
s#\\\(([ANOano])~#_C_T$1_R#g; # \(A~ -> {\~A}
s#\\\(([CcOo]),#_Cc_L$1_R_R#g; # \(c, -> {\c{c}}
s#\\\(([Ss])v#_Cv_L$1_R_R#g; # \(sv -> {\v{s}}
s#\\\(([Oo])/#_C$1_R#g; # \(O/ -> {\O}
s#\\\(ss#_Css_R#g; # \(ss -> {\ss}
s#\\\(L\-#_Cpounds_R#g; # \(L- -> {\pounds}
s#\\\(L=#_Cpounds_R#g; # \(L= -> {\pounds} # (Wrong!)
s#\\\(Y=#_h_Brm_Brlap=Y_R#g; # \(Y= -> \hbox{\rm\rlap=Y}
s#\\\(I([!?])#$1`#g; # \I! -> !`
s#\\\((AE|ae|OE|oe)#_C$1_R#g; # \(AE -> {\AE}
s#\\\(([Aa])o#_C$1$1_R#g; # \(Ao -> {\AA}
s#\\\(TH#_hI_Bhskip-.6ex_Braise.5ex_H_Mscriptscriptstyle_Bsupset_D_R_R#g;
s#\\\(th#_h_Clower.3ex_H_Blarge l_R_R_Bhskip-.52ex o_R#g;
s#\\\(D\-#_h_Booalign_L_LD_R_Bcrcr\n_Bhskip.2ex_Braise.25ex_H-_R_Bhfil_R_R#g;
s#\\\(d\-#_h_Booalign_L_Mpartial_D_Bcrcr\n_Bhskip.55ex_Braise.7ex_H-_R_Bhfil_R_R#g;
s#\\\(([ao])_U#_E_Cb_Cscriptsize $1_R_R_D#g;
# The "Scandinavian currency sign" is made with a bold \circ rlap'ed
# with 8 .'s. Big, long, and ugly, but the result is not too bad.
s#\\\(ox#_h_Booalign_Cmbox_Cboldmath_Mcirc_D_R_Bcrcr\n_Bhskip-.04ex_Braise.78ex_H._R_Bhfil_Bcrcr\n_Bhskip-.04ex_Braise.08ex_H._R_Bhfil_Bcrcr\n_c#g;
s#_c#_Bhskip.7ex_Braise.78ex_H._R_Bhfil_Bcrcr\n_Bhskip.7ex_Braise.08ex_H._R_Bhfil_Bcrcr\n_c#g;
s#_c#_Bhskip-.14ex_Braise.89ex_H._R_Bhfil_Bcrcr\n_Bhskip-.14ex_Braise-.02ex_H._R_Bhfil_Bcrcr\n_c#g;
s#_c#_Bhskip.8ex_Braise.89ex_H._R_Bhfil_Bcrcr\n_Bhskip.8ex_Braise-.02ex_H._R_Bhfil_Bcrcr\n_R_R#g;
# All the symbols from groff chars.tr that aren't listed above.
# What the heck is this?? not only is a\*: an a umlaut, but so is
# \(a: and also \(:a ! God, I wish troff would get it together!
# Oh, some people ignore all this and use \o to overlap it themselves!
# \(ao is Ao in Roman-8, and an o in groff.
# \(Cs is Cards Spades in Roman-8 and Currency Scandanavian in groff.
# How do I know which they meant??
s#\\\(bs##g; # \(bs -> (not implemented)
s#\\\(%0#_h%_Bhskip-.16ex_Blower.15ex_H_Bscriptsize 0_R_R#g;
s#\\\(f/#/#g; # \(f/ -> /
s#\\\(ha#_h_Braise.3em_H_Mscriptstyle_Bwedge_D_R_R#g;
s#\\\(ti#_Msim_D#g; # \(ti -> $\sim$
s#\\\(\-D#_h_Booalign_L_LD_R_Bcrcr\n_Bhskip.2ex_Braise.25ex_H-_R_Bhfil_R_R#g;
s#\\\(Sd#_h_Booalign_L_Mpartial_D_Bcrcr\n_Bhskip.8ex_Braise.7ex_H-_R_Bhfil_R_R#g;
s#\\\(TP#_hI_Bhskip-.6ex_Braise.5ex_H_Mscriptscriptstyle_Bsupset_D_R_R#g;
s#\\\(Tp#_h_Clower.3ex_H_Blarge l_R_R_Bhskip-.52ex o_R#g;
s#\\\(IJ#_LI_Bhskip-.2ex J_R#g; # \(IJ -> {I\hskip-.2ex J}
s#\\\(ij#_Li_Bhskip-.2ex j_R#g; # \(ij -> {i\hskip-.2ex j}
s#\\\('([ACEIOUaceou])#_C'$1_R#g; # \('A -> {\'A}
s#\\\(:([AEIOUYaeouy])#_C"$1_R#g; # \(:A -> {\"A}
s#\\\(\^([AEIOUaeou])#_C_A$1_R#g; # \(^A -> {\^A}
s#\\\(`([AEIOUaeou])#_C`$1_R#g; # \(`A -> {\`A}
s#\\\((['`])i#_C$1_Bi_R#g; # \('i -> {\'\i}
s#\\\(\^i#_C_A_Bi_R#g; # \(^i -> {\^\i}
s#\\\(:i#_C"_Bi_R#g; # \(:i -> {\"\i}
s#\\\(~([ANOano])#_C_T$1_R#g; # \(~A -> {\~A}
s#\\\(v([CcSsZz])#_Cv_L$1_R_R#g; # \(vs -> {\v{s}}
s#\\\(,([Cc])#_Cc_L$1_R_R#g; # \(,c -> {\c{c}}
s#\\\(/([OoLl])#_C$1_R#g; # \(/O -> {\O}
s#\\\(o([Aa])#_C$1$1_R#g; # \(oA -> {\AA}
s#\\\(a"#_CH_L _R_R#g; # \(a" -> {\H{ }}
s#\\\(a\-#_C=_L _R_R#g; # \(a- -> {\={ }}
s#\\\(a\.#_C._L _R_R#g; # \(a. -> {\.{ }}
s#\\\(a\^#_C_A_L _R_R#g; # \(a^ -> {\^{ }}
s#\\\(ab#_Cu_L _R_R#g; # \(ab -> {\u{ }}
s#\\\(ac#_Cc_L _R_R#g; # \(ac -> {\c{ }}
s#\\\(ad#_C"_L _R_R#g; # \(ad -> {\"{ }}
s#\\\(ah#_Cv_L _R_R#g; # \(ah -> {\v{ }}
s#\\\(a~#_C_T_L _R_R#g; # \(a~ -> {\~{ }}
s#\\\(ho#_Cc_L _R_R#g; # \(ho -> {\c{ }} # (wrong!)
s#\\\(\.([ij])#_C$1_R#g; # \(.i -> {\i}
s#\\\(Do#$#g; # \(Do -> $
s#\\\(Po#_Cpounds_R#g; # \(Po -> {\pounds}
s#\\\(Ye#_h_Brm_Brlap=Y_R#g; # \(Ye -> \hbox{\rm\rlap=Y}
s#\\\(Fo#_Mscriptscriptstyle_Bll_D#g; # \(Fo -> $\scriptscriptstyle\ll$
s#\\\(Fc#_Mscriptscriptstyle_Bgg_D#g; # \(Fc -> $\scriptscriptstyle\gg$
s#\\\(fo#_Mscriptscriptstyle_l_D#g; # \(fo -> $\scriptscriptstyle<$
s#\\\(fc#_Mscriptscriptstyle_g_D#g; # \(fc -> $\scriptscriptstyle>$
s#\\\(r([!?])#$1`#g; # \(r! -> !`
s#\\\(OK#_Cmbox_Cboldmath_Msurd_D_R_R#g; # \(OK ->{\mbox{\boldmath$\surd$}}
s#\\\(Of#_E_Cb_Cscriptsize a_R_R_D#g; # \(Of -> ${}^{\scriptsize a}}$
s#\\\(Om#_E_Cb_Cscriptsize o_R_R_D#g; # \(Om -> ${}^{\scriptsize o}}$
s#\\\(S(\d)#_E$1_D#g; # \(S1 -> ${}^1$
s#\\\(lA#_MLeftarrow_D#g; # \(lA -> $\Leftarrow$
s#\\\(rA#_MRightarrow_D#g; # \(rA -> $\Rightarrow$
s#\\\(hA#_MLeftrightarrow_D#g; # \(hA -> $\Leftrightarrow$
s#\\\(dA#_MDownarrow_D#g; # \(dA -> $\Downarrow$
s#\\\(uA#_MUparrow_D#g; # \(uA -> $\Uparrow$
s#\\\(vA#_MUpdownarrow_D#g; # \(vA -> $\Updownarrow$
s#\\\(va#_Mupdownarrow_D#g; # \(va -> $\updownarrow$
s#\\\(ba#_Chskip.4ex_Bvrule width.2ex height1.7ex depth0ex_R#g;
s#\\\(bb#_h_Bhskip.4ex_H_Booalign_Cvrule width.2ex height.5ex depth.4ex_Bcrcr\n_Bhfil_Braise.8ex_H_Bvrule width.2ex height.9ex depth0ex_R_Bhfil_R_R_R#g;
s#\\\(tm#_E_Crm_Buppercase_LTM_R_R_D#g; # \(tm -> ${}^{\rm\uppercase{TM}}$
s#\\\(ps#_BP#g; # \(ps -> \P
s#\\\(en#-#g; # \(en -> -
s#\\\(lB#_L_R[#g; # \(lB -> {}[
s#\\\(rB#]#g; # \(rB -> ]
s#\\\(lC#{#g; # \(lC -> {
s#\\\(rC#}#g; # \(rC -> }
s#\\\(la#_Mlangle_D#g; # \(la -> $\langle$
s#\\\(ra#_Mrangle_D#g; # \(ra -> $\rangle$
s#\\\(lq#``#g; # \(lq -> ``
s#\\\(rq#''#g; # \(rq -> ''
s#\\\(oq#`#g; # \(oq -> `
s#\\\(at#@#g; # \(at -> @
s#\\\(sh#\##g; # \(sh -> #
s#\\\(rs#_I#g; # \(rs -> $\backslash$
s#\\\(3d#_D_H._R_Braise.9ex_H._R_H._R_D#g;# \(3d -> .:.
s#\\\(~~#_Mapprox_D#g; # \(~~ -> $\approx$
s#\\\(!=#_Mneq_D#g; # \(!= -> $\neq$
s#\\\(=~#_Mcong_D#g; # \(=~ -> $\cong$
s#\\\(AN#_Mwedge_D#g; # \(AN -> $\wedge$
s#\\\(OR#_Mvee_D#g; # \(OR -> $\vee$
s#\\\(Ah#_Maleph_D#g; # \(Ah -> $\aleph$
s#\\\(Im#_MIm_D#g; # \(Im -> $\Im$
s#\\\(Re#_MRe_D#g; # \(Re -> $\Re$
s#\\\(md#_Mcdot_D#g; # \(md -> $\cdot$
s#\\\(nm#_Mnotin_D#g; # \(nm -> $\notin$
s#\\\(pp#_Mperp_D#g; # \(pp -> $\perp$
s#\\\(c\*#_Motimes_D#g; # \(c* -> $\otimes$
s#\\\(c\+#_Moplus_D#g; # \(c+ -> $\oplus$
s#\\\(\-h#_Mhbar_D#g; # \(-h -> $\hbar$
s#\\\(CL#_Mclubsuit_D#g; # \(CL -> $\clubsuit$
s#\\\(SP#_Mspadesuit_D#g; # \(SP -> $\spadesuit$
s#\\\(HE#_Mheartsuit_D#g; # \(HE -> $\heartsuit$
s#\\\(DI#_Mdiamondsuit_D#g; # \(DI -> $\diamondsuit$
s#\\\(CR#_Mhookleftarrow_D#g; # \(CR -> $\hookleftarrow$
s#\\\(st#_Mni_D#g; # \(st -> $\ni$
s#\\\(/_U#_Mangle_D#g; # \(/_ -> $\angle$
s#\\\(\-\+#_Mmp_D#g; # \(-+ -> $\mp$
s#\\\(nc#_Mnot_Bsupset_D#g; # \(nc -> $\not\supset$
s#\\\(ne#_Mnot_Bequiv_D#g; # \(ne -> $\not\equiv$
# misc
s#\\u([^\\]*)\\d#_Braisebox_L1ex_R_L$1_R#g;
s#\\d([^\\]*)\\u#_Braisebox_L-1ex_R_L$1_R#g;
s#\\z(.)#_Brlap_L$1_R#g; # \z|_ -> L
s#\\\*\(mm#mm#g; # \*(mm -> mm
s#\\#g; # \& ->
} # done with troff special chars
# finally, do eqn processing if they asked for it.
#
# This is very crude, and handles only the very simple eqn constructs.
# We should have some support for reading in eqn definitions rather
# than hard-coding some.
#
if ($handleeqn) {
local ($oldline);
# print STDERR "\nfrom: $_\n" if /@.*@/;
# replace @blah $\foo$ bar@ with @blah \foo bar@
1 while s/@([^@]* su[bp] [^@]*)_M([^@]*)_D([^@]*)@/@$1_B$2$3@/g;
1 while s/@([^@]*)_M([^@]*)_D([^@]* su[bp] [^@]*)@/@$1_B$2$3@/g;
while (/@.*@/) {
$oldline = $_;
s/@\s*roman\s+([^@]*)@/@$1@/g;
s/@\s*{\s*([^\s@]+)\s*sub\s+([^\s@]+)\s*}\s*sup\s+([^\s@]+)\s*([^@]*)@/_L_D_Crm_S$1_R__L$2_R_A_L$3_R_D_R@$4@/g;
s/@\s*([^\s@]+)\s*sub\s+([^\s@]+)\s*([^@]*)@/_L_D_Crm_S$1_R__L$2_R_D_R@$3@/g;
s/@\s*([^\s@]+)\s*sup\s+([^\s@]+)\s*([^@]*)@/_L_D_Crm_S$1_R_A_L$2_R_D_R@$3@/g;
s/_L_D_Crm_S""_R/_L_D_L_R/g; # handle @ "" sub 18 @
s/@mu@/_Mmu_D/g;
s/@angstrom@/_CAA_R/g;
s/@co2@/@CO sub 2@/g;
s/@no2@/@NO sub 2@/g;
s/@nox@/@NO sub x@/g;
s/@n2@/@N sub 2@/g;
s/@so2@/@SO sub 2@/g;
s/@so4@/@{SO sub 4} sup 2-@/g;
s/@no3@/@{NO sub 3} sup -@/g;
s/@hno3@/@HNO sub 3@/g;
if ($oldline eq $_) {
s/@\s*([^\s@]+)\s*([^@]*)@/_L$1_R@$2@/g;
}
s/@\s*@//g;
# print STDERR " to: $_\n";
}
}
# protect TeX characters
if ($protectTeX) {
s/\\/_I/g;
s/#/\\#/g;
s/\$/\\$/g;
s/%/\\%/g;
s/&/\\&/g;
s/{/_D\\lbrace_D/g;
s/}/_D\\rbrace_D/g;
s/\|/$|$/g;
s/</\$<$/g;
s/>/\$>$/g;
s/\^/\\^{}/g;
s/~/\\~{}/g;
}
# now convert our escaped characters back to their real selves
s/_B/\\/g;
s/_I/\$\\backslash$/g;
s/_C/{\\/g;
s/_S/ /g;
s/_L/{/g;
s/_R/}/g;
s/_l/</g;
s/_g/>/g;
s/_T/~/g;
s/_A/^/g;
s/_D/$/g;
s/_M/\$\\/g;
s/_V/|/g;
s/_E/\${}^/g;
s/_H/\\hbox{/g;
s/_h/\\leavevmode\n\\hbox{/g;
s/_U/\\_/g;
s/\n\n/\\par\n/g; # this is for fields that want paragraphs
return $_;
}
##########################################
# This converts IBMish control character combinations into troff
# This is new and mostly untested.
# Why troff? We convert ibm to troff, then troff to TeX. That
# way people can use this program to convert refer w/controls into
# plain refer. Or they can get the full blown refer->TeX.
#
# refer(c) -> refer r2b -n -der -ibm
# refer(c) -> TeX r2b -ibm
# refer(c) -> tib r2b -der -ibm
#
sub doibmtoroff {
local($_) = @_;
if (/[\200-\376]/) {
# use the -ms i\*' for accents, as all troff's can handle that.
# I'd rather use \('i, but that works for groff, while eroff wants \(i'
s/[\200]/C\\*,/g;
s/[\201]/u\\*:/g;
s/[\202]/e\\*'/g;
s/[\203]/a\\*^/g;
s/[\204]/a\\*:/g;
s/[\205]/a\\*`/g;
s/[\206]/a\\*o/g;
s/[\207]/c\\*,/g;
s/[\210]/e\\*^/g;
s/[\211]/e\\*:/g;
s/[\212]/e\\*`/g;
s/[\213]/i\\*:/g;
s/[\214]/i\\*^/g;
s/[\215]/i\\*`/g;
s/[\216]/A\\*:/g;
s/[\217]/A\\*o/g;
s/[\220]/E\\*'/g;
s/[\221]//g; # Can't make out what this is supposed to be.
s/[\222]//g; # Ditto
s/[\223]/o\\*^/g;
s/[\224]/o\\*:/g;
s/[\225]/o\\*`/g;
s/[\226]/u\\*^/g;
s/[\227]/u\\*`/g;
s/[\230]/y\\*:/g;
s/[\231]/o\\*:/g;
s/[\232]/u\\*:/g;
s/[\233]/\\\(ct/g;
s/[\234]/\\\(L-/g;
s/[\235]/\\\(Y=/g;
s/[\236]//g; # should handle this
s/[\237]//g; # and this
s/[\240]/a\\*'/g;
s/[\241]/i\\*'/g;
s/[\242]/o\\*'/g;
s/[\243]/u\\*'/g;
s/[\244]/n\\*~/g;
s/[\245]/N\\*~/g;
s/[\246]/\\\(a_/g;
s/[\247]/\\\(o_/g;
s/[\250]/\\*?/g;
s/[\251]//g;
s/[\252]/\\\(no/g;
s/[\253]/\\\(12/g;
s/[\254]/\\\(14/g;
s/[\255]/\\*!/g;
s/[\256]/\\\(<</g;
s/[\257]/\\\(>>/g;
s/[\360]/\\\(==/g;
s/[\361]/\\\(+-/g;
s/[\362]/\\\(>=/g;
s/[\363]/\\\(<=/g;
s/[\364]//g;
s/[\365]//g;
s/[\366]/\\\(di/g;
s/[\367]/\\\(~~/g;
s/[\373]/\\\(sr/g;
# I'm taking a guess that \376 is supposed to be the R set.
s/[\376]/\\\(Re/g;
}
return $_;
}